Read in the data
#normal data
ed <- fread(here::here("data/education/combined_education.csv")) %>%
mutate(year = factor(year))
ed$year <- revalue(ed$year,c("1415" = "2014-2015", "1516" = "2015-2016", "1617" = "2016-2017",
"1718" = "2017-2018", "1819" = "2018-2019"))
ed <- ed %>% filter(District.Name == "South Wasco County SD 1" |
District.Name == "Jefferson County SD 509J" |
District.Name == "North Wasco County SD 21" |
District.Name == "Sherman County SD" |
District.Name == "Dufur SD 29" |
District.Name == "Hood River County SD") %>%
select(-c("District.ID" ,"Student.Group","Fall.Membership", "Free.Reduced.Priced.Lunch","Percent.Regular.Attenders" ))
ed.increase <- ed %>% select("year" ,"District.Name","On.Time.Grad.Rate", "Teacher.Experience.Pct","Percent.ELA.Proficient.Change")
ed.decrease <- ed %>% select("year" ,"District.Name","Percent.Economically.Disadvantaged", "Percent.Chronically.Absent","Dropout.Rate")
#scaling the data within each measure to standardize the values.
#con: suppresses the minor differences in variables that have a smaller range
ed.scaled <- ed %>% mutate(Percent.ELA.Proficient.Change = scale(Percent.ELA.Proficient.Change),
Percent.Chronically.Absent = scale(Percent.Chronically.Absent),
Percent.Economically.Disadvantaged = scale(Percent.Economically.Disadvantaged),
Teacher.Experience.Pct = scale(Teacher.Experience.Pct),
Dropout.Rate = scale(Dropout.Rate),
On.Time.Grad.Rate = scale(On.Time.Grad.Rate))
ed.scaled <- ed.scaled %>% filter(District.Name == "South Wasco County SD 1" |
District.Name == "Jefferson County SD 509J" |
District.Name == "North Wasco County SD 21" |
District.Name == "Sherman County SD" |
District.Name == "Dufur SD 29" |
District.Name == "Hood River County SD")
prepare table to make a heatmap
## for all the education data with original values
ed.melt = melt(ed, id.vars = c("year", "District.Name"),
measure.vars = c("On.Time.Grad.Rate", "Dropout.Rate" ,
"Percent.ELA.Proficient.Change", "Teacher.Experience.Pct", "Percent.Chronically.Absent",
"Percent.Economically.Disadvantaged"))
## for the education data with scaled values
ed.melt.scaled = melt(ed.scaled, id.vars = c("year", "District.Name"),
measure.vars = c("On.Time.Grad.Rate", "Dropout.Rate" ,
"Percent.ELA.Proficient.Change", "Teacher.Experience.Pct", "Percent.Chronically.Absent",
"Percent.Economically.Disadvantaged"))
### increaseing variables
ed.melt.increase = melt(ed.increase, id.vars = c("year", "District.Name"),
measure.vars = c("On.Time.Grad.Rate", "Teacher.Experience.Pct",
"Percent.ELA.Proficient.Change")) %>%
mutate(variable = factor(variable, levels = c("On.Time.Grad.Rate", "Teacher.Experience.Pct",
"Percent.ELA.Proficient.Change")),
variable = recode(variable, "On.Time.Grad.Rate" = "On Time Graduation",
"Teacher.Experience.Pct" = "Teacher Experience",
"Percent.ELA.Proficient.Change" = "ELA Proficiency Change"))
ed.melt.decrease = melt(ed.decrease, id.vars = c("year", "District.Name"),
measure.vars = c("Percent.Economically.Disadvantaged", "Percent.Chronically.Absent",
"Dropout.Rate")) %>%
mutate(variable = recode(variable, "Percent.Economically.Disadvantaged" = "Economically Disadvantaged",
"Percent.Chronically.Absent" = "Chronic Absenteeism",
"Dropout.Rate"="Dropout Rate"))
try making divergent red, white and blue color palette
img <- function(obj, nam) {
image(1:length(obj), 1, as.matrix(1:length(obj)), col=obj,
main = nam, ylab = "", xaxt = "n", yaxt = "n", bty = "n")
}
rwb <- colorRampPalette(colors = c("red", "white", "blue"))
img(rwb(100), "red-white-blue")
colorRampPalette(c("red", "blue"))(25)
## [1] "#FF0000" "#F4000A" "#E90015" "#DF001F" "#D4002A" "#C90035" "#BF003F"
## [8] "#B4004A" "#AA0055" "#9F005F" "#94006A" "#8A0074" "#7F007F" "#74008A"
## [15] "#6A0094" "#5F009F" "#5500AA" "#4A00B4" "#3F00BF" "#3500C9" "#2A00D4"
## [22] "#1F00DF" "#1500E9" "#0A00F4" "#0000FF"
for south wasco alone:
sw <- filter(ed.melt, District.Name == "South Wasco County SD 1")
ggplot(sw, aes(y = variable, x = factor(year), fill = value)) +
geom_tile(color = "#ADB5BD") + #gray
scale_fill_viridis() +
coord_equal()
Facet wrap to show all schools
ggplot(ed.melt, aes(y = variable, x = year, fill = value)) +
geom_tile(color = "#ADB5BD") + #gray
geom_text(aes(label = round(value,1)), color = "black") +
coord_equal() + facet_wrap(~District.Name) +
scale_fill_viridis(option = "D") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
xlab("School Year")
## Warning: Removed 12 rows containing missing values (geom_text).
Trying with rwb divergent palette: not working - Continuous value supplied to discrete scale.
continuous scale palettes: https://biostats.w.uib.no/color-scale-for-continuous-variables/ controlling midpoint and range for color palettes: https://stackoverflow.com/questions/58718527/setting-midpoint-for-continuous-diverging-color-scale-on-a-heatmap
# interval <- c(-60, -20, -10, -5, seq(0,100,5))
# data.values <- as.vector(na.omit(ed.melt$value))
# color_rwb <- cut(data.values, breaks=interval, labels = rwb(24))
pgcol <- brewer.pal(9, "PRGn")
pgpal <- colorRampPalette(pgcol)
ggplot(ed.melt, aes(y = variable, x = year, fill = value)) +
geom_tile(color = "#ADB5BD") + #gray
geom_text(aes(label = round(value,1)), color = "black") +
coord_equal() + facet_wrap(~District.Name) +
#scale_fill_gradientn(colors = pgcol, values = c(-60, 0, 100)) +
scale_fill_continuous_divergingx(palette = "PRGn", mid = 0)
## Warning: Removed 12 rows containing missing values (geom_text).
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
xlab("School Year")
## List of 2
## $ axis.text.x:List of 11
## ..$ family : NULL
## ..$ face : NULL
## ..$ colour : NULL
## ..$ size : NULL
## ..$ hjust : num 1
## ..$ vjust : num 0.5
## ..$ angle : num 90
## ..$ lineheight : NULL
## ..$ margin : NULL
## ..$ debug : NULL
## ..$ inherit.blank: logi FALSE
## ..- attr(*, "class")= chr [1:2] "element_text" "element"
## $ x : chr "School Year"
## - attr(*, "class")= chr [1:2] "theme" "gg"
## - attr(*, "complete")= logi FALSE
## - attr(*, "validate")= logi TRUE
ggplot(ed.melt.increase, aes(y = variable, x = year, fill = value)) +
geom_tile(color = "#ADB5BD") + #gray
geom_text(aes(label = round(value,1)), color = "black") +
coord_equal() + facet_wrap(~District.Name) +
#scale_fill_gradientn(colors = pgcol, values = c(-60, 0, 100)) +
scale_fill_continuous_divergingx(palette = "PRGn", mid = 0) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
labs(title = "Benefits to Student Success", x ="School Year", y = "", fill="Percent")
## Warning: Removed 12 rows containing missing values (geom_text).
# and all in one instead of facet
ed.melt.increase2 <- ed.melt.increase %>% mutate(indicator = paste(variable, District.Name, sep= "-"))
ggplot(ed.melt.increase2, aes(y = indicator, x = year, fill = value)) +
geom_tile(color = "#ADB5BD") + #gray
geom_text(aes(label = round(value,1)), color = "black") +
coord_equal() +
#scale_fill_gradientn(colors = pgcol, values = c(-60, 0, 100)) +
scale_fill_continuous_divergingx(palette = "PRGn", mid = 0) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
## banners
geom_hline(yintercept = 6.5, color = "white", lwd = 5) +
geom_hline(yintercept = 12.5, color = "white", lwd = 5) +
geom_hline(yintercept = 18.5, color = "white", lwd = 5) +
# Title of variables
annotate("text", x=2, y=6.5,label="ELA Proficiency Change", fontface=2, size = 3, color="black") +
annotate("text", x=2, y=12.5,label="On Time Graduation", fontface=2, size = 3, color="black") +
annotate("text", x=2, y=18.5,label="Teacher Experience", fontface=2, size = 3, color="black") +
#scale_y_discrete(limit = rep(c("South Wasco County SD 1", "Sherman County SD" , "North Wasco County SD 21",
#"Jefferson County SD 509J", "Hood River County SD","Dufur SD 29"), 3)) +
xlab("School Year")
## Warning: Removed 12 rows containing missing values (geom_text).
ggplotly(ggplot(ed.melt.decrease, aes(y = variable, x = year, fill = value,
text = paste0("School District: ", District.Name,
"<br>Year: ", year,
"<br>Percent ", variable, ": ", value, "%"))) +
geom_tile(color = "#ADB5BD") + #gray
geom_text(aes(label = round(value,1)), color = "black") +
coord_equal() + facet_wrap(~District.Name) +
#scale_fill_gradientn(colors = pgcol, values = c(-60, 0, 100)) +
scale_fill_continuous_sequential(palette = "Purples 3") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
labs(title = "Barriers to Student Success",
x ="School Year", y = "", fill="Percent"),
tooltip = "text")%>%
config(displayModeBar = "static", displaylogo = FALSE,
modeBarButtonsToRemove=list("zoom2d","select2d","lasso2d","hoverClosestCartesian",
"hoverCompareCartesian","resetScale2d"))
## Warning: `group_by_()` is deprecated as of dplyr 0.7.0.
## Please use `group_by()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
# and all in one instead of facet
ed.melt.decrease2 <- ed.melt.decrease %>% mutate(indicator = paste(variable, District.Name, sep= "-"))
ggplot(ed.melt.decrease2, aes(y = indicator, x = year, fill = value)) +
geom_tile(color = "#ADB5BD") + #gray
geom_text(aes(label = round(value,1)), color = "black") +
coord_equal() +
#scale_fill_gradientn(colors = pgcol, values = c(-60, 0, 100)) +
scale_fill_continuous_sequential(palette = "Purples 3") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
## banners
geom_hline(yintercept = 6.5, color = "white", lwd = 5) +
geom_hline(yintercept = 12.5, color = "white", lwd = 5) +
geom_hline(yintercept = 18.5, color = "white", lwd = 5) +
# Title of variables
annotate("text", x=2, y=6.5,label="Chronic Absenteeism", fontface=2, size = 3, color="black") +
annotate("text", x=2, y=12.5,label="Dropout", fontface=2, size = 3, color="black") +
annotate("text", x=2, y=18.5,label="Economically Disadvantaged", fontface=2, size = 3, color="black") +
xlab("School Year")
get everything absolutely on one plot (not faceted)
ed.melt2 <- ed.melt %>% mutate(indicator = paste(variable, District.Name, sep= "-"),
indicator = factor(indicator,
levels=c("On.Time.Grad.Rate-South Wasco County SD 1",
"On.Time.Grad.Rate-North Wasco County SD 21",
"On.Time.Grad.Rate-Dufur SD 29",
"On.Time.Grad.Rate-Hood River County SD" ,
"On.Time.Grad.Rate-Sherman County SD",
"On.Time.Grad.Rate-Jefferson County SD 509J",
"Dropout.Rate-South Wasco County SD 1",
"Dropout.Rate-North Wasco County SD 21",
"Dropout.Rate-Dufur SD 29",
"Dropout.Rate-Hood River County SD" ,
"Dropout.Rate-Sherman County SD",
"Dropout.Rate-Jefferson County SD 509J",
"Percent.ELA.Proficient.Change-South Wasco County SD 1",
"Percent.ELA.Proficient.Change-North Wasco County SD 21",
"Percent.ELA.Proficient.Change-Dufur SD 29",
"Percent.ELA.Proficient.Change-Hood River County SD" ,
"Percent.ELA.Proficient.Change-Sherman County SD",
"Percent.ELA.Proficient.Change-Jefferson County SD 509J",
"Teacher.Experience.Pct-South Wasco County SD 1",
"Teacher.Experience.Pct-North Wasco County SD 21",
"Teacher.Experience.Pct-Dufur SD 29",
"Teacher.Experience.Pct-Hood River County SD" ,
"Teacher.Experience.Pct-Sherman County SD",
"Teacher.Experience.Pct-Jefferson County SD 509J",
"Percent.Chronically.Absent-South Wasco County SD 1",
"Percent.Chronically.Absent-North Wasco County SD 21",
"Percent.Chronically.Absent-Dufur SD 29",
"Percent.Chronically.Absent-Hood River County SD" ,
"Percent.Chronically.Absent-Sherman County SD",
"Percent.Chronically.Absent-Jefferson County SD 509J",
"Percent.Economically.Disadvantaged-South Wasco County SD 1",
"Percent.Economically.Disadvantaged-North Wasco County SD 21",
"Percent.Economically.Disadvantaged-Dufur SD 29",
"Percent.Economically.Disadvantaged-Hood River County SD" ,
"Percent.Economically.Disadvantaged-Sherman County SD",
"Percent.Economically.Disadvantaged-Jefferson County SD 509J")))
ggplot(ed.melt2, aes(y = indicator, x = year, fill = value)) +
geom_tile(color = "#ADB5BD") + #gray
#geom_text(aes(label = round(value,1)), color = "black") +
coord_equal() +
#scale_fill_viridis(n=25, option = "D",breaks = c(-60, -20, -10, -5, seq(0,100,5))) +
scale_fill_viridis(option = "D") +
# Horizontal lines to section off domains
geom_hline(yintercept = 6.5, color = "white", lwd = 4) +
geom_hline(yintercept = 12.5, color = "white", lwd = 4) +
geom_hline(yintercept = 18.5, color = "white", lwd = 4) +
geom_hline(yintercept = 24.5, color = "white", lwd = 4) +
geom_hline(yintercept = 30.5, color = "white", lwd = 4) +
geom_hline(yintercept = 36.5, color = "white", lwd = 4) +
# Title of variables
annotate("text", x=2, y=6.5,label="Dropout", fontface=2, size = 2, color="black") +
annotate("text", x=2, y=12.5,label="On Time Graduation", fontface=2, size = 2, color="black") +
annotate("text", x=2, y=18.5,label="Chronic Absenteeism", fontface=2, size = 2, color="black") +
annotate("text", x=2, y=24.5,label="Economically Disadvantaged", fontface=2, size = 2, color="black") +
annotate("text", x=2, y=30.5,label="ELA Proficiency Change 8th-3rd Grade", fontface=2, size = 2, color="black") +
annotate("text", x=2, y=36.5,label="Teacher Experience", fontface=2, size = 2, color="black") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
xlab("School Year")
Makes it alot easier to do direct comparisons of schools and years for one variable. but interpretability is still weak.
Facet wrap but with scaled/standardized measures
ggplot(ed.melt.scaled, aes(y = variable, x = year, fill = value)) +
geom_tile(color = "#ADB5BD") + #gray
geom_text(aes(label = round(value,1)), color = "black") +
coord_equal() + facet_wrap(~District.Name) +
#scale_fill_viridis(breaks = c(-60, -20, -10, -5, seq(0,100,5))) +
scale_fill_viridis() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
xlab("School Year")
## Warning: Removed 12 rows containing missing values (geom_text).